.libPaths(c('/home/data/t210344/R/x86_64-pc-linux-gnu-library/4.3','/usr/local/lib/R/library','/refdir/Rlib','/home/data/t210344/R/x86_64-pc-linux-gnu-library/4.2'))
library(ReactomePA)
##
## ReactomePA v1.46.0 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
library(ggplot2)
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'sp'
## The following object is masked from 'package:IRanges':
##
## %over%
## 'SeuratObject' was built under R 4.3.2 but the current version is
## 4.4.0; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 'SeuratObject' was built with package 'Matrix' 1.6.5 but the current
## version is 1.7.0; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed
##
## Attaching package: 'SeuratObject'
## The following object is masked from 'package:IRanges':
##
## intersect
## The following object is masked from 'package:S4Vectors':
##
## intersect
## The following object is masked from 'package:BiocGenerics':
##
## intersect
## The following objects are masked from 'package:base':
##
## %||%, intersect, t
##
## Attaching package: 'Seurat'
## The following object is masked from 'package:base':
##
## %||%
library(pheatmap)
library(reshape2)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
## The following object is masked from 'package:S4Vectors':
##
## expand
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
library(RColorBrewer)
library(ggsci)
library(ggrepel)
library(ggplotify)
library(cowplot)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
library(viridis)
## Loading required package: viridisLite
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
Bcell <- readRDS("/home/data/t210344/Project/5Dataset/5annotationB/Bcell.annotated.rds")
Idents(Bcell) <- 'celltype'
col<-colorRampPalette((pal_npg("nrc")(9)))(13)
COLORS_NAMES <- levels(Bcell$celltype)
names(col) <- COLORS_NAMES
col["MZB-2"] <- "#CCAC2C"
health = subset(Bcell,subset=group==c("HD"))
primary=subset(Bcell,subset=group==c("HNSCC"))
Oropharynx = subset(primary,subset=subsite==c("Oropharynx"))
Non_tonsil = subset(primary,subset=subsite==c("Larynx/Hypopharynx","Nasopharynx","Oral cavity"))
## Warning in subsite == c("Larynx/Hypopharynx", "Nasopharynx", "Oral cavity"):
## longer object length is not a multiple of shorter object length
a1 <- UMAPPlot(object = health, group.by = "celltype", pt.size = 0.3, repel=T, label=F, label.size=5, cols = col) +
labs(title = "Healthy Tonsil")+NoLegend();a1
a2 <- UMAPPlot(object = Oropharynx, group.by = "celltype", pt.size = 0.3, repel=T, label=F, label.size=5, cols = col) +
labs(title = "HNSCC Tonsil")+NoLegend();a2
a3 <- UMAPPlot(object = Non_tonsil, group.by = "celltype", pt.size = 0.3, repel=T, label=F, label.size=5, cols = col) +
labs(title = "HNSCC Non-tonsil");a3
a2 <- a2
a1+a2+a3
#ggsave("plots/UMAP_celltype_HDtonsilvsHNSCCtonsilvsHNSCCnon-tonsil.pdf", width=width.ppi*2.5 ,height=height.ppi)
sce = subset(Bcell,subset=group==c("HD","HNSCC"))
cellnum <- table(sce$celltype,sce$orig.ident)
cellnum_prop <- prop.table(cellnum, margin = 2)
cell.prop <- as.data.frame(cellnum_prop)
colnames(cell.prop) <- c("Celltype","orig.ident","Proportion")
clinical_metadata <- sce@meta.data[, c("orig.ident","group","subsite")]
unique_clinical_metadata <- unique(clinical_metadata)
rownames(unique_clinical_metadata)=NULL
unique_clinical_metadata <- unique_clinical_metadata %>% distinct(orig.ident, .keep_all = TRUE)
cellnum_with_clinical <- left_join(cell.prop, unique_clinical_metadata, by = "orig.ident")
table(cellnum_with_clinical$group)
##
## HD HNSCC
## 143 1664
table(cellnum_with_clinical$subsite)
##
## Larynx/Hypopharynx Nasopharynx Oral cavity Oropharynx
## 130 13 1066 442
## Unknown
## 13
cellnum_with_clinical$groupby[cellnum_with_clinical$group=="HD"] <- 'HD'
cellnum_with_clinical$groupby[cellnum_with_clinical$subsite %in% c("Larynx/Hypopharynx","Nasopharynx","Oral cavity")] <- 'Non-tonsil'
cellnum_with_clinical$groupby[cellnum_with_clinical$subsite %in% "Oropharynx"] <- 'Tonsil'
filtered_data <- cellnum_with_clinical %>%
mutate(Proportion = Proportion * 100)
filtered_data <- cellnum_with_clinical %>%
filter(Celltype %in% c("Naive","GCB","PB")) %>%
mutate(Proportion = Proportion * 100)
p <- ggboxplot(filtered_data, x = "groupby", y = "Proportion",select = c("HD", "Non-tonsil","Tonsil"),
order = c("HD","Tonsil", "Non-tonsil"),
color = "Celltype", palette = col,
add = "jitter",
facet.by = "Celltype", short.panel.labs = FALSE) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.title.x=element_blank(), legend.position = "right")
my_comparisons <- list( c("Non-tonsil", "Tonsil"),c("Non-tonsil", "HD"),c("Tonsil", "HD") )
p + stat_compare_means(comparisons = my_comparisons,label = "p.signif",label.y=c(50,70,90))
## Warning in wilcox.test.default(c(0, 13.4854771784232, 0, 0, 0,
## 1.08695652173913, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0, 6.22406639004149, 10, 35.7142857142857, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.763358778625954, 29.253112033195, 0, :
## cannot compute exact p-value with ties
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
# Figure 3A-2
sce = subset(Bcell,subset=group==c("HD","HNSCC"))
cellnum <- table(sce$celltype,sce$orig.ident)
cellnum_prop <- prop.table(cellnum, margin = 2)
cell.prop <- as.data.frame(cellnum_prop)
colnames(cell.prop) <- c("Celltype","orig.ident","Proportion")
clinical_metadata <- sce@meta.data[, c("orig.ident","group","subsite")]
unique_clinical_metadata <- unique(clinical_metadata)
rownames(unique_clinical_metadata)=NULL
unique_clinical_metadata <- unique_clinical_metadata %>% distinct(orig.ident, .keep_all = TRUE)
cellnum_with_clinical <- left_join(cell.prop, unique_clinical_metadata, by = "orig.ident")
table(cellnum_with_clinical$group)
##
## HD HNSCC
## 143 1664
table(cellnum_with_clinical$subsite)
##
## Larynx/Hypopharynx Nasopharynx Oral cavity Oropharynx
## 130 13 1066 442
## Unknown
## 13
cellnum_with_clinical$groupby[cellnum_with_clinical$group=="HD"] <- 'HD'
cellnum_with_clinical$groupby[cellnum_with_clinical$subsite %in% c("Larynx/Hypopharynx","Nasopharynx","Oral cavity")] <- 'Non-tonsil'
cellnum_with_clinical$groupby[cellnum_with_clinical$subsite %in% "Oropharynx"] <- 'Tonsil'
filtered_data <- cellnum_with_clinical %>%
mutate(Proportion = Proportion * 100)
filtered_data <- cellnum_with_clinical %>%
filter(Celltype %in% c("Naive","GCB","PB")) %>%
mutate(Proportion = Proportion * 100)
p <- ggboxplot(filtered_data, x = "groupby", y = "Proportion",select = c("HD", "Non-tonsil","Tonsil"),
order = c("HD","Tonsil", "Non-tonsil"),
color = "Celltype", palette = col,
add = "jitter",
facet.by = "Celltype", short.panel.labs = FALSE) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.title.x=element_blank(), legend.position = "right")
my_comparisons <- list( c("Non-tonsil", "Tonsil"),c("Non-tonsil", "HD"),c("Tonsil", "HD") )
p + stat_compare_means(comparisons = my_comparisons,label = "p.signif",label.y=c(50,70,90))
## Warning in wilcox.test.default(c(0, 13.4854771784232, 0, 0, 0,
## 1.08695652173913, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0, 6.22406639004149, 10, 35.7142857142857, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.763358778625954, 29.253112033195, 0, :
## cannot compute exact p-value with ties
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
# Figure 3B
#library(future)
#plan("multisession", workers = 4)
#degs = lapply(unique(Bcell$celltype), function(x){
# sub_obj = subset(Bcell, subset = celltype == x)
# Idents(sub_obj) <- "group"
# FindMarkers(sub_obj, ident.1 = "HNSCC", ident.2 = "HD", min.cells.group = 2)
#})
#saveRDS(degs, file = "7-HD-HNSC/degs_list.rds")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.3 ✔ stringr 1.5.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%||%() masks Seurat::%||%(), SeuratObject::%||%(), base::%||%()
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ gridExtra::combine() masks dplyr::combine(), Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ purrr::discard() masks scales::discard()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select() masks AnnotationDbi::select()
## ✖ dplyr::slice() masks IRanges::slice()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
degs <- readRDS("/home/data/t210344/Project/5Dataset/7-HD-HNSC/deg.rds")
names(degs) <- unique(Bcell$celltype)
df2 <- data.frame()
top_n_df <- data.frame()
top10sig0_df <- data.frame()
for (i in 1:length(degs)){
dat <- degs[[i]]
dat$cell_type <- names(degs)[[i]]
dat$gene <- rownames(dat)
dat$change <- ifelse(dat$avg_log2FC>0 & dat$p_val_adj<0.05,'up',
ifelse(dat$avg_log2FC<0 & dat$p_val_adj<0.05,'down','nochange'))
dat$label <- ifelse(dat$p_val_adj<0.01,"adjust P-val<0.01","adjust P-val>=0.01")
dat_sig <- dat[dat$change!='nochange',]
up_top2 <- top_n(dat_sig[dat_sig$change=='up',],3,avg_log2FC)[,'gene']
down_top2 <- top_n(dat_sig[dat_sig$change=='down',],-3,avg_log2FC)[,'gene']
top10sig0 <- filter(dat_sig) %>% distinct(gene,.keep_all = T) %>% top_n(5,abs(avg_log2FC))
label_gene <- c(up_top2,down_top2)
top_n_df <- rbind(top_n_df,dat[label_gene,])
df2 <- rbind(df2,dat)
top10sig0_df <- rbind(top10sig0_df,top10sig0)
}
rownames(df2) <- 1:nrow(df2)
rownames(top10sig0_df) <- 1:nrow(top10sig0_df)
df3 <- df2
df2 <- df2[df2$change!='nochange',]
df2$size <- case_when(!(df2$gene %in% top10sig0_df$gene)~ 1,
df2$gene %in% top10sig0_df$gene ~ 2)
dt <- filter(df2,size==1)
head(dt)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cell_type gene
## 1 2.789735e-08 -1.1818619 0.322 0.905 0.001045816 Plasmablast VIMP
## 2 5.231351e-08 -1.1143559 0.349 0.952 0.001961129 Plasmablast ATP5G2
## 3 7.689704e-08 -0.8867564 0.267 0.857 0.002882716 Plasmablast FAM46C
## 4 1.083211e-07 -0.2608247 0.000 0.190 0.004060741 Plasmablast RP11-66N24.3
## 5 1.920983e-07 -0.4974562 0.151 0.619 0.007201381 Plasmablast AKAP17A
## 6 1.004393e-06 -0.8551306 0.356 1.000 0.037652669 Plasmablast TCEB2
## change label size
## 1 down adjust P-val<0.01 1
## 2 down adjust P-val<0.01 1
## 3 down adjust P-val<0.01 1
## 4 down adjust P-val<0.01 1
## 5 down adjust P-val<0.01 1
## 6 down adjust P-val>=0.01 1
p1 <- ggplot()+
geom_jitter(data = dt,
aes(x = factor(cell_type, levels = levels(Bcell$celltype)), y = avg_log2FC, color = label),
size = 0.85,
width =0.4)
p1
p2 <- p1+
geom_jitter(data = top10sig0_df,
aes(x = cell_type, y = avg_log2FC, color = label),
size = 1,
width =0.4)
p2
up_bar <- top_n(group_by(df2,cell_type),1,avg_log2FC)
down_bar <- top_n(group_by(df2,cell_type),-1,avg_log2FC)
bar_df <- data.frame(row.names=up_bar$cell_type,x=up_bar$cell_type,
up=up_bar$avg_log2FC+0.3,
down=down_bar$avg_log2FC-0.3)
bar_df <- bar_df[sort(unique(df2$cell_type)),]
bar_df$down <- ifelse(bar_df$down>=0,NA,bar_df$down)
ggplot()+geom_col(data = bar_df,aes(x=x,y=up),alpha = 0.2)+
geom_col(data = bar_df,aes(x=x,y=down),alpha = 0.2)
p3 <- p2+geom_col(data = bar_df,aes(x=x,y=up),alpha = 0.2)+
geom_col(data = bar_df,aes(x=x,y=down),alpha = 0.2)
p3
box_df <- data.frame(x=(levels(Bcell$celltype)),y=0)
p4 <- p3+geom_tile(data = box_df,aes(x=x,y=y),height=0.6,fill=col,color = "black")
p4
p5 <- p4+
geom_text_repel(
data=top10sig0_df,
aes(x=cell_type,y=avg_log2FC,label=gene),
force = 1.2,
arrow = arrow(length = unit(0.008, "npc"),
type = "open", ends = "last")
)
p5
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p6 <- p5 +
scale_color_manual(name=NULL,
values = c("red","black"))
p6
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p7 <- p6+
labs(x="CellType",y="average log2FC")+
geom_text(data=dt,
aes(x=cell_type,y=0,label=cell_type),
cols = col,
size =4,
color ="black")
## Warning in geom_text(data = dt, aes(x = cell_type, y = 0, label = cell_type), :
## Ignoring unknown parameters: `cols`
p7
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p8 <- p7+
theme_minimal()+
theme(
axis.title = element_text(size = 13,
color = "black",
face = "bold"),
axis.line.y = element_line(color = "black",
size = 1.2),
axis.line.x = element_blank(),
axis.text.x = element_blank(),
panel.grid = element_blank(),
legend.position = "top",
legend.direction = "vertical",
legend.justification = c(1,0),
legend.text = element_text(size = 15)
)
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p8
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#ggsave("plots/HD_HNSC_multiple_comparison.pdf",p8, width=16 ,height=6)
df <- readr::read_csv(
"/home/data/t210344/Project/5Dataset/7-HD-HNSC/HDv.s.HNSCC_GSEA/Bcells_GO_BP_cluster_simplified.csv",
show_col_types = FALSE
)
## New names:
## • `` -> `...1`
if ("...1" %in% names(df)) df <- dplyr::select(df, -`...1`)
df$GeneRatio_num <- vapply(strsplit(as.character(df$GeneRatio), "/"),
function(z) if (length(z) == 2)
suppressWarnings(as.numeric(z[1]) / as.numeric(z[2]))
else NA_real_, numeric(1))
df_top <- df |>
dplyr::filter(!is.na(.data$p.adjust)) |>
dplyr::group_by(.data$Cluster) |>
dplyr::slice_min(order_by = .data$p.adjust, n = 5, with_ties = FALSE) |>
dplyr::ungroup() |>
dplyr::mutate(
Description_wrapped = stringr::str_wrap(.data$Description, width = 50),
Description_ord = forcats::fct_reorder(Description_wrapped, .data$p.adjust, .desc = TRUE)
)
p <- ggplot2::ggplot(
df_top,
ggplot2::aes(x = -log10(.data$p.adjust),
y = .data$Description_ord,
color = .data$Cluster,
size = .data$GeneRatio_num)
) +
ggplot2::geom_point() +
ggplot2::labs(x = expression(-log[10]("adj p")),
y = NULL,
size = "GeneRatio",
title = "GO BP (simplified)") +
ggplot2::theme_bw() +
ggplot2::theme(axis.text.y = ggplot2::element_text(size = 9),
plot.title = ggplot2::element_text(hjust = 0.5))
print(p)
# Figure 3D
suppressPackageStartupMessages({
library(msigdbr)
library(GSEABase)
library(GSVA)
library(pheatmap)
library(dplyr)
})
av <- read.csv("/home/data/t210344/Project/5Dataset/7-HD-HNSC/AverageExpression-0.8.csv",
row.names = 1, check.names = FALSE)
av <- as.matrix(av)
mode(av) <- "numeric"
gset <- msigdbr(species = "Homo sapiens",
category = "C2",
subcategory = "REACTOME") %>%
dplyr::select(gs_name, gene_symbol)
gset_set <- split(x = gset$gene_symbol, f = gset$gs_name)
gene_sets <- lapply(names(gset_set), function(nm) {
GeneSet(
geneIds = unique(gset_set[[nm]]),
setName = nm,
geneIdType = SymbolIdentifier(),
organism = "Homo sapiens"
)
})
gsc <- GeneSetCollection(gene_sets)
params <- gsvaParam(exprData = av,
geneSets = gsc,
kcdf = "Gaussian")
es.max <- gsva(params)
## No annotation package name available in the input data object.
## Attempting to directly match identifiers in data to gene sets.
## Estimating GSVA scores for 1614 gene sets.
## Estimating ECDFs with Gaussian kernels
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
df = do.call(rbind,
lapply(1:ncol(es.max), function(i){
data.frame(
path = rownames(es.max),
cluster = colnames(es.max)[i],
sd.1 = es.max[,i],
sd.2 = apply(es.max[,-i], 1, median)
)
}))
df$fc = df$sd.1 - df$sd.2
top <- df %>% group_by(cluster) %>% top_n(5, fc)
n = es.max[top$path, ]
n <- n[!duplicated(rownames(n)), ]
rownames(n) <- gsub('REACTOME_', '', rownames(n))
pheatmap(n, show_colnames = TRUE,
scale = "row", angle_col = 45,
cluster_row = TRUE, cluster_col = TRUE,
color = colorRampPalette(c("navy", "white", "firebrick3"))(50))
c <- c("REACTOME_CLASS_I_MHC_MEDIATED_ANTIGEN_PROCESSING_PRESENTATION",
"REACTOME_ANTIGEN_ACTIVATES_B_CELL_RECEPTOR_BCR_LEADING_TO_GENERATION_OF_SECOND_MESSENGERS",
"REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR",
"REACTOME_MHC_CLASS_II_ANTIGEN_PRESENTATION",
"REACTOME_ANTIGEN_PROCESSING_CROSS_PRESENTATION",
"REACTOME_SIGNALING_BY_MET",
"REACTOME_SIGNALING_BY_NOTCH1",
"REACTOME_INTERFERON_SIGNALING",
"REACTOME_SIGNALING_BY_TGFB_FAMILY_MEMBERS",
"REACTOME_SIGNALING_BY_TGF_BETA_RECEPTOR_COMPLEX",
"REACTOME_AUTOPHAGY",
"REACTOME_REGULATED_NECROSIS",
"REACTOME_ANTIVIRAL_MECHANISM_BY_IFN_STIMULATED_GENES",
"REACTOME_CELL_CELL_COMMUNICATION",
"REACTOME_EPH_EPHRIN_SIGNALING",
"REACTOME_CELL_SURFACE_INTERACTIONS_AT_THE_VASCULAR_WALL",
"REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL")
c <- c[c %in% rownames(es.max)]
pheatmap(es.max[c,], show_colnames = TRUE,
scale = "row", angle_col = 45,
cluster_row = TRUE, cluster_col = FALSE,
color = colorRampPalette(c("navy", "white", "firebrick3"))(50))